class: title-slide, center, bottom <!-- <iframe src="https://koaning.io/iframes/predict-bw.html" allowfullscreen scrolling="no"></iframe> --> <iframe src="assets/html/linear_interpolate.html" allowfullscreen scrolling="no"></iframe> <style type="text/css"> .panelset { --panel-tab-font-family: Work Sans; --panel-tab-background-color-active: #fffbe0; --panel-tab-border-color-active: #023d4d; } .panelset .panel-tabs .panel-tab > a { color: #023d4d; } .code-bg-turq .remark-code, .code-bg-turq .remark-code * { background-color:#DAFBF7 !important; } </style> <script type="text/javascript"> document.querySelectorAll('.flip-card').forEach(card => { card.addEventListener('click', function() { // Prevent any action if the card is already in the process of flipping if (!this.classList.contains('animating')) { const cardBack = this.querySelector('.flip-card-back'); const isFlipped = this.classList.contains('flip'); // Mark the card as animating to prevent further clicks during the transition this.classList.add('animating'); // Determine the appropriate height based on whether the card is flipping to back or front if (!isFlipped) { // Calculate height based on the back content const naturalHeight = cardBack.scrollHeight + 40; // Add double the padding height this.style.height = `${naturalHeight}px`; } else { // Reset to default height when flipped back to front this.style.height = "200px"; } // Toggle the flip state after setting the height this.classList.toggle('flip'); // Remove the animating class after the transition is complete setTimeout(() => { this.classList.remove('animating'); }, 800); // Corresponds to the transition duration } }); }); </script> <span style="color: #7D6D61; font-family:'Times New Roman'; font-size:24"><i>Lecture Series in Data Science</i></span> # Day 1 - Connecting the Dots: ## <b>Bridging Theory and Practice in Machine Learning</b> ### <span style="color: #7D6D61;"><b><i><span style='color:#096B72;text-decoration:text-decoration-thickness:4px;'><span>F. K. Saforo @</span> KNUST on May 22, 2024</i></b></span> --- class: intro-slide, center, middle # A little more about me
<img style="border-radius: 70%;" src="https://avatars.githubusercontent.com/u/36967151?v=4" width="150px"/> -- <svg viewBox="0 0 512 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M458.4 64.3C400.6 15.7 311.3 23 256 79.3 200.7 23 111.4 15.6 53.6 64.3-21.6 127.6-10.6 230.8 43 285.5l175.4 178.7c10 10.2 23.4 15.9 37.6 15.9 14.3 0 27.6-5.6 37.6-15.8L469 285.6c53.5-54.7 64.7-157.9-10.6-221.3zm-23.6 187.5L259.4 430.5c-2.4 2.4-4.4 2.4-6.8 0L77.2 251.8c-36.5-37.2-43.9-107.6 7.3-150.7 38.9-32.7 98.9-27.8 136.5 10.5l35 35.7 35-35.7c37.8-38.5 97.8-43.2 136.5-10.6 51.1 43.1 43.5 113.9 7.3 150.8z"></path></svg> Mathematician <br> -- <svg viewBox="0 0 576 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M271.06,144.3l54.27,14.3a8.59,8.59,0,0,1,6.63,8.1c0,4.6-4.09,8.4-9.12,8.4h-35.6a30,30,0,0,1-11.19-2.2c-5.24-2.2-11.28-1.7-15.3,2l-19,17.5a11.68,11.68,0,0,0-2.25,2.66,11.42,11.42,0,0,0,3.88,15.74,83.77,83.77,0,0,0,34.51,11.5V240c0,8.8,7.83,16,17.37,16h17.37c9.55,0,17.38-7.2,17.38-16V222.4c32.93-3.6,57.84-31,53.5-63-3.15-23-22.46-41.3-46.56-47.7L282.68,97.4a8.59,8.59,0,0,1-6.63-8.1c0-4.6,4.09-8.4,9.12-8.4h35.6A30,30,0,0,1,332,83.1c5.23,2.2,11.28,1.7,15.3-2l19-17.5A11.31,11.31,0,0,0,368.47,61a11.43,11.43,0,0,0-3.84-15.78,83.82,83.82,0,0,0-34.52-11.5V16c0-8.8-7.82-16-17.37-16H295.37C285.82,0,278,7.2,278,16V33.6c-32.89,3.6-57.85,31-53.51,63C227.63,119.6,247,137.9,271.06,144.3ZM565.27,328.1c-11.8-10.7-30.2-10-42.6,0L430.27,402a63.64,63.64,0,0,1-40,14H272a16,16,0,0,1,0-32h78.29c15.9,0,30.71-10.9,33.25-26.6a31.2,31.2,0,0,0,.46-5.46A32,32,0,0,0,352,320H192a117.66,117.66,0,0,0-74.1,26.29L71.4,384H16A16,16,0,0,0,0,400v96a16,16,0,0,0,16,16H372.77a64,64,0,0,0,40-14L564,377a32,32,0,0,0,1.28-48.9Z"></path></svg> Data Scientist: Research/Build novel ML tools for actuarial loss reserving<br> -- <svg viewBox="0 0 576 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M208 0c-29.9 0-54.7 20.5-61.8 48.2-.8 0-1.4-.2-2.2-.2-35.3 0-64 28.7-64 64 0 4.8.6 9.5 1.7 14C52.5 138 32 166.6 32 200c0 12.6 3.2 24.3 8.3 34.9C16.3 248.7 0 274.3 0 304c0 33.3 20.4 61.9 49.4 73.9-.9 4.6-1.4 9.3-1.4 14.1 0 39.8 32.2 72 72 72 4.1 0 8.1-.5 12-1.2 9.6 28.5 36.2 49.2 68 49.2 39.8 0 72-32.2 72-72V64c0-35.3-28.7-64-64-64zm368 304c0-29.7-16.3-55.3-40.3-69.1 5.2-10.6 8.3-22.3 8.3-34.9 0-33.4-20.5-62-49.7-74 1-4.5 1.7-9.2 1.7-14 0-35.3-28.7-64-64-64-.8 0-1.5.2-2.2.2C422.7 20.5 397.9 0 368 0c-35.3 0-64 28.6-64 64v376c0 39.8 32.2 72 72 72 31.8 0 58.4-20.7 68-49.2 3.9.7 7.9 1.2 12 1.2 39.8 0 72-32.2 72-72 0-4.8-.5-9.5-1.4-14.1 29-12 49.4-40.6 49.4-73.9z"></path></svg> Writer - Poems and essays about all things life<br> [<svg viewBox="0 0 496 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"></path></svg>fsaforo1](https://github.com/fsaforo1); [<svg viewBox="0 0 512 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M464 64H48C21.49 64 0 85.49 0 112v288c0 26.51 21.49 48 48 48h416c26.51 0 48-21.49 48-48V112c0-26.51-21.49-48-48-48zm0 48v40.805c-22.422 18.259-58.168 46.651-134.587 106.49-16.841 13.247-50.201 45.072-73.413 44.701-23.208.375-56.579-31.459-73.413-44.701C106.18 199.465 70.425 171.067 48 152.805V112h416zM48 400V214.398c22.914 18.251 55.409 43.862 104.938 82.646 21.857 17.205 60.134 55.186 103.062 54.955 42.717.231 80.509-37.199 103.053-54.947 49.528-38.783 82.032-64.401 104.947-82.653V400H48z"></path></svg>frankforo52@gmail.com](mailto:frankforo52@gmail.com) --- class: middle, center, inverse # .bold[Disclaimer: All opinions are my own, and do not reflect any company or persons I am associated with.] --- class: middle center hide-count background-image: url(assets/images/fabrication.jpg) background-size: cover # .bold[Before we proceed...] --- class: intro-slide <span style="color:#023d4d; font-weight:500; font-size:32px"><b>BIG PICTURE</b>: <i>Things I hope to reinforce and hope you (re-)discover today...</i></span> <!-- # <b>BIG PICTURE</b>: *Things I hope to reinforce and hope you (re-)discover today...* --> <br> --
<b>Mathematics</b>, like Twi, Mandarin or English, is a tool for <b>abstraction</b> or thinking. You are literally being taught how to **universally** formalize abstraction. --
<b>Like a Carpenter's apprentice, you are currently in training</b>: A very specific skill-set or level of mastery is expected of you at the end of your undergraduate training. --
<b>Like a scaffold,</b> your undergraduate curricula provides **essential building blocks** for **future work**. Always ensure you <b>see the scaffold</b>! Always ensure you can *intuite* about or abstract from what you learn. -- .bg-washed-green.b--dark-green.ba.bw2.br3.shadow-5.ph4.mt5[ Focus on fundamental **intuition-building** (*scaffolding*) through **inquiry**, **exposure** and **practice**! ] --- class: intro-slide <span style="color:#023d4d; font-weight:500; font-size:32px"><b>Today's Focus (Local Picture):</b> Helping you see the *scaffolding*</span> .panelset[ .panel[.panel-name[The Opportunity] .pull-left[ .bg-washed-green.b--dark-green.ba.bw2.br3.shadow-5.ph4.mt5[The ability to intuitively grasp fundamental concepts is critical for future work ] Some factors hampering intuition-building: <ul> <li>Students (novices or apprentices) don't know what they don't know</li> <li>Practical real-life exposure is either infeasible or rarely available</li> </ul> ] .pull-right[ <br> <img src="assets/images/strange.JPG" width="60%" style="display: block; margin: auto;" /> > <center><span style="color:#023d4d; font-weight:500; font-size:18px"><b>Build cognitive intuition to link symbols to clear mental abstractions</b></span></center> ] ] .panel[.panel-name[Infinity War] <img src="assets/images/len_alg.jpeg" width="70%" style="display: block; margin: auto;" /> ] .panel[.panel-name[Endgame]
> <center><span style="color:#023d4d; font-weight:500; font-size:24px"><b>Build a cognition that spans all layers of mathematical abstraction</b></span></center> ] .panel[.panel-name[The Multiverse]
] ] --- class: intro-slide <span style="color:#023d4d; font-weight:500; font-size:32px"><b>Today's Focus (Local Picture):</b> Helping you see the necessary *scaffolding* for Data Science</span> <div class="flip-card-container"> <div class="flip-card"> <div class="flip-card-inner"> <div class="flip-card-front"> <h1>Core Mathematical Foundations</h1> <p>Fundamental theories and concepts that form the bedrock of all mathematical applications.</p> </div> <div class="flip-card-back"> <h1>Core Mathematical Foundations</h1> <ul> <li><b>Calculus (I, II, III)</b>: Essential for understanding dynamic changes in systems across sciences.</li> <li><b>Linear Algebra</b>: Crucial for handling data structures and operations in computational fields.</li> <li><b>Real Analysis</b>: Provides a rigorous understanding of continuous functions and their properties.</li> </ul> </div> </div> </div> <div class="flip-card"> <div class="flip-card-inner"> <div class="flip-card-front"> <h1>Computational and Numerical Methods</h1> <p>Focuses on algorithmic and numerical solutions to solve practical mathematical problems.</p> </div> <div class="flip-card-back"> <h1>Computational and Numerical Methods</h1> <ul> <li><b>Numerical Methods</b>: Focuses on creating algorithms to solve complex mathematical problems numerically.</li> <li><b>Differential Equations</b>: Used to model and solve problems involving changes over some space or time.</li> </ul> </div> </div> </div> <div class="flip-card"> <div class="flip-card-inner"> <div class="flip-card-front"> <h1>Statistical and Probabilistic Analysis</h1> <p>Encompasses methods and theories for analyzing data and managing uncertainty in predictions.</p> </div> <div class="flip-card-back"> <h1>Statistical and Probabilistic Analysis</h1> <ul> <li><b>Statistics</b>: Critical for making informed decisions based on data analysis in multiple fields.</li> <li><b>Probability</b>: Fundamental for assessing risks and probabilities in uncertain conditions.</li> <li><b>Topology</b>: Offers a deep insight into the shapes and features of complex data sets.</li> </ul> </div> </div> </div> <div class="flip-card"> <div class="flip-card-inner"> <div class="flip-card-front"> <h1>Abstract and Discrete Structures</h1> <p>Explores abstract algebraic structures and discrete systems, foundational in cryptography and computer science.</p> </div> <div class="flip-card-back"> <h1>Abstract and Discrete Structures</h1> <ul> <li><b>Discrete Mathematics</b>: Essential for understanding algorithms, graph theory, and logical structures.</li> <li><b>Abstract Algebra</b>: Focuses on the study of algebraic structures which is crucial in cryptography and theoretical computer science.</li> </ul> </div> </div> </div> </div> --- class: middle, center, inverse # .bold[Let's begin!] --- class: intro-slide <span style="color:#023d4d; font-weight:500; font-size:32px"><b>What is Data Science?</b></span> .panelset[ .panel[.panel-name[Examples] <img src="assets/images/ds_01.png" width="65%" style="display: block; margin: auto;" /> > <center><span style="color:#023d4d; font-weight:500; font-size:24px"><b>Identifying patterns by exposure to several examples</b></span></center> ] .panel[.panel-name[Experiences] <img src="assets/images/ds_02.jpg" width="70%" style="display: block; margin: auto;" /> > <center><span style="color:#023d4d; font-weight:500; font-size:24px"><b>Learning through rules and several experiences</b></span></center> ] .panel[.panel-name[Learning is difficult] <img src="assets/images/ds_03.png" width="60%" style="display: block; margin: auto;" /> > <center><span style="color:#023d4d; font-weight:500; font-size:24px"><b>Not all of experiences can be adequately explained</b></span></center> ] .panel[.panel-name[Definition] .pull-left[ .bg-washed-green.b--dark-green.ba.bw2.br3.shadow-5.ph4.mt5[ Machine Learning (ML) provides systems the ability to *automatically* learn from **experience** and/or **examples** without being **explicitly programmed** ] ] .pull-right[ <img src="assets/images/ds_vs_ml2.png" width="72%" style="display: block; margin: auto;" /> ] ] ] --- class: intro-slide <span style="color:#023d4d; font-weight:500; font-size:32px"><b>Where do we use Machine Learning (ML)?</b></span> -- <p> <span style="font-size:30px"><b>Inside other software</b></span><br> <span style="font-size:24px">Unlock phone with your face, search with a voice command</span><br> </p> -- <p> <span style="font-size:30px"><b>In analytics, data mining</b></span><br> <span style="font-size:24px">Find typical clusters of users, predict spikes in stock prices</span><br> </p> -- <p> <span style="font-size:30px"><b>In science</b></span><br> <span style="font-size:24px">Predict the structure of proteins, drug discovery, discover the functional form of a system</span> </p> --- class: intro-slide <span style="color:#023d4d; font-weight:500; font-size:32px"><b>What makes a suitable machine learning problem?</b></span> - Can't be **solved explicitly** - Approximate solutions are fine - Limited reliability, predictability, interpretability is fine - Plenty of examples to learn from .pull-left[ **bad**<br> clinical decisions<br> parole decision *support* ] .pull-right[ **good**<br> clinical decision *support*<br> recommending a movie weather predictions stock price predictions ] --- ## Scenario 1 - Inferences .panelset[ .panel[.panel-name[Scenario] <b>Which vaccine is more effective?</b> .bg-washed-green.b--dark-green.ba.bw2.br3.shadow-5.ph4.mt5[ <p><b>Vaccine A:</b> Tested on <b>100</b> people in total, proved effective in <b>90</b> people</p> <p><b>Vaccine B:</b> Tested on <b>1000</b> people in total, proved effective in <b>850</b> people</p> .tr[ — <b>The COVID-19 Vaccine Problem</b> ]] ] .panel[.panel-name[Related Examples]
] ] --- ## Scenario 2 - Groupings and Patterns .panelset[ .panel[.panel-name[Scenario] <img src="assets/images/recs2.PNG" width="75%" style="display: block; margin: auto;" /> ] .panel[.panel-name[Related Examples]
] ] --- ## Scenario 3 - Non-class Predictions .panelset[ .panel[.panel-name[Scenario] <iframe src="https://covid19-projections.com/path-to-normality-comparison.html" width="100%" height="400px" data-external="1"></iframe> ] .panel[.panel-name[Related Examples]
] ] --- <span style="color:#023d4d; font-weight:500; font-size:32px"><b>Working Example:</b> ML in Insurance</span> .panelset[ .panel[.panel-name[Insurance Overview] - **Typical Businesses:** - The cost of production (materials, labor, overhead) is typically **known before** setting the price of goods/services. - **Insurance Businesses:** - The full cost of claims (i.e., the "cost of production") is typically **not known** ahead of pricing. - The ultimate cost of claims is uncertain: litigation, inflation, re-opens, etc. - Pricing is based on risk and actuarial analysis, and historical claims data. .bg-washed-green.b--dark-green.ba.bw2.br3.shadow-5.ph4.mt5[<b>Actuarial Reserving</b> is the process of estimating the amount of money that an insurance company needs to set aside to pay future claims and expenses arising from policies that have already been sold ] ] .panel[.panel-name[The Actuarial Use-Case] <img src="assets/images/loss_triangle.png" width="70%" style="display: block; margin: auto;" /> > <center><span style="color:#023d4d; font-weight:500; font-size:24px"><b>Illustration of an actuarial loss triangle</b></span></center> .bg-washed-green.b--dark-green.ba.bw2.br3.shadow-5.ph4.mt5[ THE USE-CASE: At any given age of an accident cohort, predict the ultimate cost of Bodily Injury (BI) claims in that cohort. ] ] .panel[.panel-name[The ML Solution] <div class="flip-card-container"> <div class="flip-card"> <div class="flip-card-inner"> <div class="flip-card-front"> <h1>Claim Details</h1> </div> <div class="flip-card-back"> <h1>Claim Details</h1> <ul> <li>Report date/lag</li> <li>Claimant history</li> <li>Third party involvement and subrogation</li> </ul> </div> </div> </div> <div class="flip-card"> <div class="flip-card-inner"> <div class="flip-card-front"> <h1>Accident Details</h1> </div> <div class="flip-card-back"> <h1>Accident Details</h1> <ul> <li>Time of day</li> <li>Weather conditions</li> <li>Location (urban vs. rural)</li> <li>Type of accident (e.g., collision, slip and fall)</li> <li>Number of vehicles involved</li> <li>Police report narratives</li> <li>Accident scene photos</li> <li>Witness statements</li> </ul> </div> </div> </div> <div class="flip-card"> <div class="flip-card-inner"> <div class="flip-card-front"> <h1>Medical Factors</h1> </div> <div class="flip-card-back"> <h1>Medical Factors</h1> <ul> <li>Initial injury severity</li> <li>Pre-existing conditions</li> <li>Medical history</li> <li>Time to first treatment</li> <li>Type of medical treatment received</li> <li>Medical imaging (X-rays, MRIs)</li> <li>Doctor’s notes and reports</li> <li>Treatment plans</li> </ul> </div> </div> </div> <div class="flip-card"> <div class="flip-card-inner"> <div class="flip-card-front"> <h1>Legal and Regulatory Environment</h1> </div> <div class="flip-card-back"> <h1>Legal and Regulatory Environment</h1> <ul> <li>Jurisdiction-specific laws</li> <li>Legal representation (presence and type)</li> <li>Regulatory changes</li> <li>Court settlement trends</li> <li>Legal documents and case files</li> <li>Lawyer’s correspondence</li> </ul> </div> </div> </div> <div class="flip-card"> <div class="flip-card-inner"> <div class="flip-card-front"> <h1>Insurance Policy Details</h1> </div> <div class="flip-card-back"> <h1>Insurance Policy Details</h1> <ul> <li>Coverage limits</li> <li>Deductibles and co-pays</li> <li>Policy exclusions</li> <li>Length of coverage</li> </ul> </div> </div> </div> </div> <img src="assets/images/act_ml.png" width="55%" style="display: block; margin: auto;" /> ] ] --- class: middle, center, inverse # .bold[10-min break. Let's begin the scaffolding!] --- <span style="color:#023d4d; font-weight:500; font-size:32px"><b>Scaffolds are the essential support for building</b></span> <img src="assets/images/owl.png" width="45%" style="display: block; margin: auto;" /> .bg-washed-green.b--dark-green.ba.bw2.br3.shadow-5.ph4.mt5[Most of your undergraduate education is may feel and look like (1) ] --- <span style="color:#023d4d; font-weight:500; font-size:32px"><b>The <i>owl</i> of our working problem is the production ML solution</b></span> `main.py` ``` # Sketch of a typical ML project program from src import * def main(): collect_data() process_data() train_model() make_predictions() deploy() if __name__ == "__main__": main() ``` .bg-washed-green.b--dark-green.ba.bw2.br3.shadow-5.ph4.mt5[The <b>Scaffold</b> are the essential theoretical and practical building blocks ] --- <span style="color:#023d4d; font-weight:500; font-size:32px"><b>Let's explore the scaffold for the <i>process_data()</i> phase of the ML solution</b></span> - What does it take to process the data below? - What theory supports this process? - What practical considerations are necessary? <br> <div class="flip-card-container"> <div class="flip-card"> <div class="flip-card-inner"> <div class="flip-card-front"> <h1>Claim Details</h1> </div> <div class="flip-card-back"> <h1>Claim Details</h1> <ul> <li>Report date/lag</li> <li>Claimant history</li> <li>Third party involvement and subrogation</li> </ul> </div> </div> </div> <div class="flip-card"> <div class="flip-card-inner"> <div class="flip-card-front"> <h1>Accident Details</h1> </div> <div class="flip-card-back"> <h1>Accident Details</h1> <ul> <li>Time of day</li> <li>Weather conditions</li> <li>Location (urban vs. rural)</li> <li>Type of accident (e.g., collision, slip and fall)</li> <li>Number of vehicles involved</li> <li>Police report narratives</li> <li>Accident scene photos</li> <li>Witness statements</li> </ul> </div> </div> </div> <div class="flip-card"> <div class="flip-card-inner"> <div class="flip-card-front"> <h1>Medical Factors</h1> </div> <div class="flip-card-back"> <h1>Medical Factors</h1> <ul> <li>Initial injury severity</li> <li>Pre-existing conditions</li> <li>Medical history</li> <li>Time to first treatment</li> <li>Type of medical treatment received</li> <li>Medical imaging (X-rays, MRIs)</li> <li>Doctor’s notes and reports</li> <li>Treatment plans</li> </ul> </div> </div> </div> <div class="flip-card"> <div class="flip-card-inner"> <div class="flip-card-front"> <h1>Legal and Regulatory Environment</h1> </div> <div class="flip-card-back"> <h1>Legal and Regulatory Environment</h1> <ul> <li>Jurisdiction-specific laws</li> <li>Legal representation (presence and type)</li> <li>Regulatory changes</li> <li>Court settlement trends</li> <li>Legal documents and case files</li> <li>Lawyer’s correspondence</li> </ul> </div> </div> </div> <div class="flip-card"> <div class="flip-card-inner"> <div class="flip-card-front"> <h1>Insurance Policy Details</h1> </div> <div class="flip-card-back"> <h1>Insurance Policy Details</h1> <ul> <li>Coverage limits</li> <li>Deductibles and co-pays</li> <li>Policy exclusions</li> <li>Length of coverage</li> </ul> </div> </div> </div> </div> --- <span style="color:#023d4d; font-weight:500; font-size:32px"><b>Let's explore the scaffold for the <i>process_data()</i> phase of the ML solution</b></span> <img src="assets/images/tab_1.png" width="80%" style="display: block; margin: auto;" /> -- > <span style="color:#023d4d; font-weight:500; font-size:22px"><b>How do we perform mathematical operations that leverages the relationships among the data to predict the ultimate cost of claims?:</b></span> <img src="assets/images/act_ml.png" width="40%" style="display: block; margin: auto;" /> --- <span style="color:#023d4d; font-weight:500; font-size:32px"><b>The needs to be numerically encoded to allow any mathematical operations</b></span> <img src="assets/images/tab_2.png" width="80%" style="display: block; margin: auto;" /> .bg-washed-green.b--dark-green.ba.bw2.br3.shadow-5.ph4.mt5[Computers <i>do not understand</i> raw letters and images ] --- <span style="color:#023d4d; font-weight:500; font-size:32px"><b>Each feature (or column) needs to be also seen as a single unit for efficient mathematical operations</b></span> <!-- $$ --> <!-- \begin{align*} --> <!-- \text{Cost_of_Claims} &= 244.56 + 3000 \cdot \sin(\text{Time_of_Day}) \\ --> <!-- &\quad + 400 \cdot \ln(\text{Pre_exist_Cond} + 1) \\ --> <!-- &\quad + 5000 \cdot (\text{Legal_Rep})^2 \\ --> <!-- &\quad + 345 \cdot e^{\text{Police_Rprt}} \\ --> <!-- &\quad + 500 \cdot \sqrt{\text{Inj_desc}} \\ --> <!-- &\quad + 7000 \cdot \text{crash_photo} \\ --> <!-- &\quad - 3.6 \cdot (\text{Time_of_Day} \cdot \text{Pre_exist_Cond}) \\ --> <!-- &\quad + 9.6 \cdot (\text{Legal_Rep} \cdot \text{Police_Rprt}) \\ --> <!-- &\quad \times 0.5 \cdot (\text{claim_id} \cdot \text{Inj_desc}) --> <!-- \end{align*} --> <!-- $$ --> <img src="assets/images/eqn.png" width="40%" style="display: block; margin: auto;" /> <img src="assets/images/tab_2.png" width="70%" style="display: block; margin: auto;" /> --- class: middle, right, intro-slide background-image: url(assets/images/minions-02_bg.png) background-size: 35% background-position: bottom left # Anyone recognize this **Scaffold** yet? --- <span style="color:#023d4d; font-weight:500; font-size:32px"><b>The entire data must also be seen as a single unit since the model or function would ingest the entire information in whole (or batches)</b></span> <img src="assets/images/act_ml.png" width="60%" style="display: block; margin: auto;" /> <img src="assets/images/tab_3.png" width="70%" style="display: block; margin: auto;" /> --- <span style="color:#023d4d; font-weight:500; font-size:32px"><b>Let's breakdown the *scaffold*. We need a tool that:</b></span> - leverages **function(s)** to perform large-scale mathematical operations; - on **data blocks** representing **specific information** that **relate together** to predict some task <br> -- <img src="assets/images/tab_4.png" width="67%" style="display: block; margin: auto;" /> --- <span style="color:#023d4d; font-weight:500; font-size:32px"><b>Linear Algebra to the rescue!</b></span> <img src="assets/images/tab_5.png" width="70%" style="display: block; margin: auto;" /> .bg-washed-green.b--dark-green.ba.bw2.br3.shadow-5.ph4.mt5[The scaffold for this crucial task, <i>process_data()</i> is **vector mechanics** from **Linear Algebra** ] --- <span style="color:#023d4d; font-weight:500; font-size:32px"><b>Why is important to know the underpinnings of the Scaffold?</b></span> - Knowing how to manipulate or design **complex data structures** is EXTREMELY essential to innovation in machine learning and statistics. - Linear Algebra is extremely important to data management and computation at large scale - This might make you take your Vector Mechanics class a little more seriously :) --- <span style="color:#023d4d; font-weight:500; font-size:32px"><b>Let's recap what we've learnt so far</b></span> - Most of your undergraduate courses are introductory, and are essential building blocks (scaffold) - Being able to identify <i>scaffolds</i> in complex systems can enhance your understanding and intuition <br> -- <span style="color:#023d4d; font-weight:500; font-size:32px"><b>Up Next...</b></span> - Look at the scaffolds for the next phase of the working ML project, *designing the model and making decisions* --- name: goodbye class: right, bottom background-color: #e6f3fc background-image: url(assets/images/boiler.png) background-size: 25% background-position: bottom left <img style="border-radius: 50%;" src="https://avatars.githubusercontent.com/u/36967151?v=4" width="150px"/> # Any questions? ## <i>find resources at...</i> [<svg viewBox="0 0 496 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"></path></svg>ds-talk](https://github.com/fsaforo1/ds_talk_2024) [<svg viewBox="0 0 512 512" style="height:1em;position:relative;display:inline-block;top:.1em;" xmlns="http://www.w3.org/2000/svg"> <path d="M464 64H48C21.49 64 0 85.49 0 112v288c0 26.51 21.49 48 48 48h416c26.51 0 48-21.49 48-48V112c0-26.51-21.49-48-48-48zm0 48v40.805c-22.422 18.259-58.168 46.651-134.587 106.49-16.841 13.247-50.201 45.072-73.413 44.701-23.208.375-56.579-31.459-73.413-44.701C106.18 199.465 70.425 171.067 48 152.805V112h416zM48 400V214.398c22.914 18.251 55.409 43.862 104.938 82.646 21.857 17.205 60.134 55.186 103.062 54.955 42.717.231 80.509-37.199 103.053-54.947 49.528-38.783 82.032-64.401 104.947-82.653V400H48z"></path></svg>Frank.Saforo](mailto:Frank.Saforo@LibertyMutual.com)